April Wright
08.09.2018
-Tell homology from convergence
-Trace the origins of structures
-Taxonomy
-Taxonomy
library(phytools)
tree <- pbtree(n = 5)
plot(tree, cex = 3.5, no.margin = TRUE, edge.width = 1.5)
Tip: What we are putting on the tree. May be species, individuals, or higher-order taxa.
May be called terminal node, leaf, one degree node.
library(phytools)
tree <- pbtree(n = 5)
plot(tree, cex = 3.5, no.margin = TRUE, edge.width = 1.5)
Branch: What connects the tip to the tree. Can have a variety of units, which we will discuss over the next few days.
May be called edge.
library(phytools)
tree <- pbtree(n = 5)
plot(tree, cex = 3.5, no.margin = TRUE, edge.width = 1.5)
nodelabels()
Node: Where nodes meet, implying a most recent common ancestor.
May be called vertex, or three-degree node.
plot(tree, cex = 3.5, no.margin = TRUE, edge.width = 1.5, direction = "downwards")
plot(tree, cex = 3.5, no.margin = TRUE, edge.width = 1.5, type="fan")
plot(tree, cex = 3.5, no.margin = TRUE, edge.width = 1.5)
nodelabels(cex = 3.5)
#rotateNodes(tree, c(7, 8))
#plot(tree, cex = 3.5, no.margin = TRUE, edge.width = 1.5)
is.monophyletic(tree, c("t1", "t2"), plot = TRUE, edge.width = 1.5, cex = 3.5, no.margin = TRUE)
[1] FALSE
# reroot(tree, node.number)
plot(tree, cex = 3.5, no.margin = TRUE, edge.width = 1.5)
Ingroup: Taxa of interest
Outgroup: Taxon closely related used to root the tree
unroot_tree <- unroot(tree)
plot(unroot_tree, cex = 3.5, no.margin = TRUE, edge.width = 1.5)
library(alignfigR)
char_data <- read_alignment("data/bears_fasta.fa")
char_data[1:3]
$Agriarctos_spp
[1] "?" "0" "?" "?" "?" "?" "?" "?" "?" "?" "?" "?" "?" "?" "?" "?" "0"
[18] "0" "0" "1" "1" "1" "1" "0" "0" "1" "?" "1" "1" "?" "0" "1" "1" "1"
[35] "1" "0" "1" "1" "0" "?" "?" "0" "1" "1" "1" "0" "?" "?" "?" "?" "?"
[52] "?" "?" "?" "?" "?" "?" "?" "?" "?" "?" "?"
$Ailurarctos_lufengensis
[1] "?" "0" "?" "?" "?" "?" "?" "?" "?" "?" "?" "?" "?" "?" "?" "?" "?"
[18] "0" "0" "1" "1" "1" "1" "0" "1" "1" "?" "1" "1" "?" "0" "?" "?" "?"
[35] "?" "0" "1" "1" "1" "?" "0" "0" "1" "1" "1" "0" "1" "0" "1" "1" "0"
[52] "1" "1" "?" "?" "?" "?" "?" "?" "?" "?" "?"
$Ailuropoda_melanoleuca
[1] "1" "0" "1" "1" "1" "1" "0" "1" "1" "0" "1" "0" "0" "1" "0" "0" "0"
[18] "0" "0" "1" "1" "1" "1" "0" "1" "0" "1" "1" "1" "0" "0" "1" "0" "1"
[35] "0" "0" "1" "1" "0" "0" "0" "0" "1" "1" "1" "0" "1" "0" "0" "1" "0"
[52] "1" "1" "0" "0" "0" "1" "0" "0" "0" "1" "0"
library(alignfigR)
char_data <- read_alignment("data/bears_fasta.fa")
char_data[1:3]
$Agriarctos_spp
[1] "?" "0" "?" "?" "?" "?" "?" "?" "?" "?" "?" "?" "?" "?" "?" "?" "0"
[18] "0" "0" "1" "1" "1" "1" "0" "0" "1" "?" "1" "1" "?" "0" "1" "1" "1"
[35] "1" "0" "1" "1" "0" "?" "?" "0" "1" "1" "1" "0" "?" "?" "?" "?" "?"
[52] "?" "?" "?" "?" "?" "?" "?" "?" "?" "?" "?"
$Ailurarctos_lufengensis
[1] "?" "0" "?" "?" "?" "?" "?" "?" "?" "?" "?" "?" "?" "?" "?" "?" "?"
[18] "0" "0" "1" "1" "1" "1" "0" "1" "1" "?" "1" "1" "?" "0" "?" "?" "?"
[35] "?" "0" "1" "1" "1" "?" "0" "0" "1" "1" "1" "0" "1" "0" "1" "1" "0"
[52] "1" "1" "?" "?" "?" "?" "?" "?" "?" "?" "?"
$Ailuropoda_melanoleuca
[1] "1" "0" "1" "1" "1" "1" "0" "1" "1" "0" "1" "0" "0" "1" "0" "0" "0"
[18] "0" "0" "1" "1" "1" "1" "0" "1" "0" "1" "1" "1" "0" "0" "1" "0" "1"
[35] "0" "0" "1" "1" "0" "0" "0" "0" "1" "1" "1" "0" "1" "0" "0" "1" "0"
[52] "1" "1" "0" "0" "0" "1" "0" "0" "0" "1" "0"
These data are binary
library(alignfigR)
char_data <- read_alignment("data/bears_fasta.fa")
char_data[1:3]
$Agriarctos_spp
[1] "?" "0" "?" "?" "?" "?" "?" "?" "?" "?" "?" "?" "?" "?" "?" "?" "0"
[18] "0" "0" "1" "1" "1" "1" "0" "0" "1" "?" "1" "1" "?" "0" "1" "1" "1"
[35] "1" "0" "1" "1" "0" "?" "?" "0" "1" "1" "1" "0" "?" "?" "?" "?" "?"
[52] "?" "?" "?" "?" "?" "?" "?" "?" "?" "?" "?"
$Ailurarctos_lufengensis
[1] "?" "0" "?" "?" "?" "?" "?" "?" "?" "?" "?" "?" "?" "?" "?" "?" "?"
[18] "0" "0" "1" "1" "1" "1" "0" "1" "1" "?" "1" "1" "?" "0" "?" "?" "?"
[35] "?" "0" "1" "1" "1" "?" "0" "0" "1" "1" "1" "0" "1" "0" "1" "1" "0"
[52] "1" "1" "?" "?" "?" "?" "?" "?" "?" "?" "?"
$Ailuropoda_melanoleuca
[1] "1" "0" "1" "1" "1" "1" "0" "1" "1" "0" "1" "0" "0" "1" "0" "0" "0"
[18] "0" "0" "1" "1" "1" "1" "0" "1" "0" "1" "1" "1" "0" "0" "1" "0" "1"
[35] "0" "0" "1" "1" "0" "0" "0" "0" "1" "1" "1" "0" "1" "0" "0" "1" "0"
[52] "1" "1" "0" "0" "0" "1" "0" "0" "0" "1" "0"
Always arranged with rows being taxa and columns corresponding to a character - “matrix” structure
Example character from Brady:
How do we know we've captured the relevant character axes?
library(ggplot2)
colors <- c("blue", "purple","white")
plot_alignment(char_data, colors, taxon_labels = TRUE) + theme(text = element_text(size=40))
library(ggplot2)
colors <- c("blue", "purple","white")
plot_alignment(char_data, colors, taxon_labels = TRUE) + theme(text = element_text(size=40))
How do we go from this to a tree?
??? Have them start installs on the next page while we do this.
library(treesiftr)
aln_path <- "data/bears_fasta.fa"
bears <- read_alignment(aln_path)
tree <- read.tree("data/starting_tree.tre")
sample_df <- generate_sliding(bears, start_char = 1, stop_char = 5, steps = 1)
print(sample_df)
starting_val stop_val step_val
1 1 2 1
2 2 3 1
3 3 4 1
4 4 5 1
5 5 6 1
library(phangorn)
library(ggtree)
output_vector <- generate_tree_vis(sample_df = sample_df, alignment = aln_path,tree = tree, phy_mat = bears, pscore = TRUE)
Final p-score 2 after 0 nni operations
Final p-score 2 after 0 nni operations
Final p-score 2 after 0 nni operations
Final p-score 2 after 1 nni operations
Final p-score 2 after 1 nni operations
output_vector #sample output - you will get more than this when you run in your console
[[1]]
[[2]]
[[3]]
[[4]]
[[5]]
??? Do a couple trees on the board, including the pruning algorithm. Then allow them to play.
??? This is one character. Imagine many - enumeration is not possible.Also note that several trees have the same “best” tree
execute data/bears_morphology.nex
cstatus
tstatus
showmatrix
showdist
log file="mylogfile"
alltrees
What happened here?
??? This is one character. Imagine many - enumeration is not possible.Also note that several trees have the same “best” tree
Heuristic - use of shortcuts to reduce the number of trees we need to search
hsearch
Heuristic - use of shortcuts to reduce the number of trees we need to search
hsearch swap = nni
Heuristic - use of shortcuts to reduce the number of trees we need to search
hsearch swap = spr
savetrees from=1 to=1 file=results/tree1.tre;
savetrees from=2 to=2 file=results/tree2.tre;
savetrees from=3 to=3 file=results/tree3.tre;
library(ape)
tree1 <- read.nexus("results/tree1.tre")
tree2 <- read.nexus("results/tree2.tre")
tree3 <- read.nexus("results/tree3.tre")
??? This is one character. Imagine many - enumeration is not possible.Also note that several trees have the same “best” tree
contree all / treefile=Results/contree.tre;
help contree
char_mat <- phangorn::phyDat(char_data, levels = c(0, 1, "?"), type = "USER")
samples <- sample(1:62, 62, replace = TRUE)
new_mat <- subset(char_mat, select=samples)
samples
[1] 6 8 22 13 11 45 16 6 43 33 43 32 47 60 59 25 14 33 2 18 22 54 25
[24] 12 21 20 56 43 3 12 10 34 57 25 9 18 22 20 12 53 1 12 20 44 40 22
[47] 18 12 6 21 3 4 54 55 25 59 48 15 38 25 6 42
char_mat <- phangorn::phyDat(char_data, levels = c(0, 1, "?"), type = "USER")
list_of_mats <- list()
for (i in 1:100){
samples <- sample(1:62, 62, replace = TRUE)
new_mat <- subset(char_mat, select=samples)
list_of_mats[[i]] <- new_mat
}
Generates a tree from each matrix
plot(tree1)
bootstrap nreps=100 search=heuristic /addseq=random;
savetrees from=1 to=1 savebootp=nodelabels file=results/bootstrap.tre;
b_tre <- read.nexus("results/bootstrap.tre")
plot(b_tre)
nodelabels(b_tre$nodelabels)
bootstrap nreps=100 search=heuristic /addseq=random;
savetrees from=1 to=1 savebootp=nodelabels file=results/bootstrap.tre;
b_tre <- read.nexus("results/bootstrap.tre")
plot(b_tre)
nodelabels(b_tre$node.label)
“b_tre$nodelabels” what is this construction?
Samples without replacement
jackknife nreps=100 search=heuristic /addseq=random;
savetrees from=1 to=1 savebootp=nodelabels file=results/jackknife.tre;
j_tre <- read.nexus("results/jackknife.tre")
plot(j_tre)
nodelabels(j_tre$node.label)
Is an answer that implies homoplasy ever the right one?
begin assumptions;
usertype my_ctype stepmatrix=2
0 1
[0] . 1
[1] 0 .
;
end;
ctype my_ctype:1
hsearch
For those of you who are more familiar with likelihood and Bayesian analyses, what does this sound like?
We'll pick up this afternoon with Bayesian models